# import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

######################
### EITC SUBGROUPS ###
######################

# below dataset comes from copying final_NC_cov_est_full_subgroups_align_V3.txt into an excel worksheet, separataxpayer_idg cells by spaces, and adding the column headers "Term" and "Value"
# see recalibration_v2.py
eitc_subgroup_df = pd.read_excel("/REDACTED/final_recalibration_subgroups_align_V3.xlsx")

# subset to disparity estimates
eitc_subgroup_df = eitc_subgroup_df[eitc_subgroup_df["Term"].isin(["D_l", "D", "D_p"])][["Term", "Value"]].reset_index(drop = True)

# add appropriate labels for weights, subset to relevant results
eitc_subgroup_df['Weighted_Col'] = [0,0,0,0,0,0,1,1,1,1,1,1] * 6
eitc_subgroup_df['NC_weights'] = [0,0,0,1,1,1,0,0,0,1,1,1] * 6
eitc_subgroup_df = eitc_subgroup_df[eitc_subgroup_df['Weighted_Col'] == 1]
del eitc_subgroup_df['Weighted_Col']

# add labels for different subgroups
subgroup_name = ['Single EITC', 'Joint EITC', 'Single Male EITC', 'Single Female EITC', 'Single Male EITC with Deps', 'Single Male EITC without Deps']
eitc_subgroup_df['Subgroup'] = list(np.repeat(subgroup_name,6))

# break out into results with NC weights and without
eitc_subgroup_df_unwgt = pd.pivot(eitc_subgroup_df[eitc_subgroup_df['NC_weights']==0], index=['Subgroup'], columns = 'Term', values = 'Value').reset_index()
eitc_subgroup_df_wgt = pd.pivot(eitc_subgroup_df[eitc_subgroup_df['NC_weights']==1], index=['Subgroup'], columns = 'Term', values = 'Value').reset_index()

eitc_subgroup_df_unwgt = eitc_subgroup_df_unwgt.set_index('Subgroup').loc[subgroup_name].reset_index()
eitc_subgroup_df_wgt = eitc_subgroup_df_wgt.set_index('Subgroup').loc[subgroup_name].reset_index()

# plot unweighted recalibrated results
fig,ax=plt.subplots(1,1,sharex=True, sharey=True, figsize=(10,8))
ax.scatter(eitc_subgroup_df_unwgt['Subgroup'], eitc_subgroup_df_unwgt['D_l'] * 100, label = "Linear Estimator")
#ax.scatter(eitc_subgroup_df_unwgt['Subgroup'], eitc_subgroup_df_unwgt['D'] * 100, label = "NC")
ax.scatter(eitc_subgroup_df_unwgt['Subgroup'], eitc_subgroup_df_unwgt['D_p'] * 100, label = "Probabilistic Estimator")
plt.xticks(eitc_subgroup_df_unwgt['Subgroup'])
plt.xlabel("EITC Subgroup")
plt.xticks(rotation=30)
plt.ylim(0, 7)
plt.ylabel("Black/Non-Black Disparity (Percentage Points)")
#plt.title("Unweighted Recalibrated Disparity by EITC Subgroup")
plt.legend(loc = "upper left")
plt.savefig('/REDACTED/recalibrated_disp_eitc_subgroup_unweighted.png', facecolor = 'white', bbox_inches='tight')
plt.show()
plt.close()

# plot weighted recalibrated results
fig,ax=plt.subplots(1,1,sharex=True, sharey=True, figsize=(10,8))
ax.scatter(eitc_subgroup_df_wgt['Subgroup'], eitc_subgroup_df_wgt['D_l'] * 100, label = "Linear Estimator")
#ax.scatter(eitc_subgroup_df_wgt['Subgroup'], eitc_subgroup_df_wgt['D'] * 100, label = "NC")
ax.scatter(eitc_subgroup_df_wgt['Subgroup'], eitc_subgroup_df_wgt['D_p'] * 100, label = "Probabilistic Estimator")
plt.xticks(eitc_subgroup_df_wgt['Subgroup'])
plt.xlabel("EITC Subgroup")
plt.xticks(rotation=30)
plt.ylim(0, 7)
plt.ylabel("Black/Non-Black Disparity (Percentage Points)")
#plt.title("Weighted Recalibrated Disparity by EITC Subgroup")
plt.legend(loc = "upper left")
plt.savefig('/REDACTED/recalibrated_disp_eitc_subgroup_weighted.png', facecolor = 'white', bbox_inches='tight')
plt.show()
plt.close()


###################
### INCOME BINS ###
###################

# below dataset comes from copying final_NC_cov_est_full_bins_align_V3.txt into an excel worksheet, separataxpayer_idg cells by spaces, and adding the column headers "Term" and "Value"
# see recalibration_v2.py
income_bin_df = pd.read_excel('/REDACTED/final_recalibration_bins_align_V3.xlsx')

# subset to disparity estimates
income_bin_df = income_bin_df[income_bin_df["Term"].isin(["D_l", "D", "D_p", "bin_20_mean"])][["Term", "Value"]].reset_index(drop = True)

# add appropriate labels for bins and weights
income_bin_df['Bin'] = list(np.repeat([i for i in range(1, 20+1)],8))
income_bin_df['NC_weights'] = [0,0,0,0,1,1,1,1] * 20

# break out into results with NC weights and without
income_bin_df_unwgt = pd.pivot(income_bin_df[income_bin_df["NC_weights"]==0], index=['Bin'], columns = 'Term', values = 'Value').reset_index()
income_bin_df_wgt = pd.pivot(income_bin_df[income_bin_df["NC_weights"]==1], index=['Bin'], columns = 'Term', values = 'Value').reset_index()

income_bin_df_unwgt = income_bin_df_unwgt.loc[income_bin_df_unwgt['bin_20_mean'] <= 200000]
income_bin_df_wgt = income_bin_df_wgt.loc[income_bin_df_wgt['bin_20_mean'] <= 200000]

# plot unweighted recalibrated results
fig,ax=plt.subplots(1,1,sharex=True, sharey=True, figsize=(10,8))
ax.scatter(income_bin_df_unwgt['bin_20_mean'], income_bin_df_unwgt['D_l'] * 100, label = "Linear Estimator")
#ax.scatter(income_bin_df_unwgt['bin_20_mean'], income_bin_df_unwgt['D'] * 100, label = "NC")
ax.scatter(income_bin_df_unwgt['bin_20_mean'], income_bin_df_unwgt['D_p'] * 100, label = "Probabilistic Estimator")
#plt.xticks(income_bin_df_unwgt['bin_20_mean'])
plt.ylim(0, 5)
plt.xlabel("Reported Income ($)")
plt.ylabel("Black/Non-Black Disparity (Percentage Points)")
#plt.title("Unweighted Recalibrated Disparity by Income Bin")
plt.legend()
plt.savefig('/REDACTED/recalibrated_disp_income_bin_unweighted.png', facecolor = 'white')
plt.show()
plt.close()

# plot weighted recalibrated results
fig,ax=plt.subplots(1,1,sharex=True, sharey=True, figsize=(10,8))
ax.scatter(income_bin_df_wgt['bin_20_mean'], income_bin_df_wgt['D_l'] * 100, label = "Linear Estimator")
#ax.scatter(income_bin_df_wgt['bin_20_mean'], income_bin_df_wgt['D'] * 100, label = "NC")
ax.scatter(income_bin_df_wgt['bin_20_mean'], income_bin_df_wgt['D_p'] * 100, label = "Probabilistic Estimator")
#plt.xticks(income_bin_df_wgt['bin_20_mean'])
plt.ylim(-.5, 5)
plt.xlabel("Reported Income ($)")
plt.ylabel("Black/Non-Black Disparity (Percentage Points)")
#plt.title("Weighted Recalibrated Disparity by Income Bin")
plt.legend()
plt.savefig('/REDACTED/recalibrated_disp_income_bin_weighted.png', facecolor = 'white')
plt.show()
plt.close()